# Packages and dependence
packageCheckClassic <- function(x){
#
for( i in x ){
if( ! require( i , character.only = TRUE ) ){
install.packages( i , dependencies = TRUE )
require( i , character.only = TRUE )
}
}
}
packageCheckClassic(c('gridExtra','DESeq2','adegenet','devtools','BiocManager','ggplot2','ggrepel','markdown','pheatmap','RColorBrewer','genefilter','gplots','vegan','dplyr','limma'))
## Loading required package: gridExtra
## Loading required package: DESeq2
## Loading required package: S4Vectors
## Warning: package 'S4Vectors' was built under R version 4.1.3
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
## Loading required package: adegenet
## Loading required package: ade4
##
## Attaching package: 'ade4'
## The following object is masked from 'package:GenomicRanges':
##
## score
## The following object is masked from 'package:BiocGenerics':
##
## score
##
## /// adegenet 2.1.10 is loaded ////////////
##
## > overview: '?adegenet'
## > tutorials/doc/questions: 'adegenetWeb()'
## > bug reports/feature requests: adegenetIssues()
## Loading required package: devtools
## Loading required package: usethis
## Loading required package: BiocManager
## Bioconductor version '3.14' is out-of-date; the current release version '3.18'
## is available with R version '4.3'; see https://bioconductor.org/install
##
## Attaching package: 'BiocManager'
## The following object is masked from 'package:devtools':
##
## install
## Loading required package: ggplot2
## Loading required package: ggrepel
## Loading required package: markdown
## Loading required package: pheatmap
## Loading required package: RColorBrewer
## Loading required package: genefilter
##
## Attaching package: 'genefilter'
## The following objects are masked from 'package:MatrixGenerics':
##
## rowSds, rowVars
## The following objects are masked from 'package:matrixStats':
##
## rowSds, rowVars
## Loading required package: gplots
##
## Attaching package: 'gplots'
## The following object is masked from 'package:IRanges':
##
## space
## The following object is masked from 'package:S4Vectors':
##
## space
## The following object is masked from 'package:stats':
##
## lowess
## Loading required package: vegan
## Loading required package: permute
##
## Attaching package: 'permute'
## The following object is masked from 'package:devtools':
##
## check
## Loading required package: lattice
## This is vegan 2.6-4
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:Biobase':
##
## combine
## The following object is masked from 'package:matrixStats':
##
## count
## The following objects are masked from 'package:GenomicRanges':
##
## intersect, setdiff, union
## The following object is masked from 'package:GenomeInfoDb':
##
## intersect
## The following objects are masked from 'package:IRanges':
##
## collapse, desc, intersect, setdiff, slice, union
## The following objects are masked from 'package:S4Vectors':
##
## first, intersect, rename, setdiff, setequal, union
## The following objects are masked from 'package:BiocGenerics':
##
## combine, intersect, setdiff, union
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Loading required package: limma
## Warning: package 'limma' was built under R version 4.1.3
##
## Attaching package: 'limma'
## The following object is masked from 'package:DESeq2':
##
## plotMA
## The following object is masked from 'package:BiocGenerics':
##
## plotMA
#BiocManager::install('tximport', force = TRUE)
#BiocManager::install('apeglm')
#BiocManager::install('ashr')
#BiocManager::install("EnhancedVolcano")
#BiocManager::install("arrayQualityMetrics")
if (!require(devtools)) install.packages("devtools")
devtools::install_github("yanlinlin82/ggvenn")
## Skipping install of 'ggvenn' from a github remote, the SHA1 (25fd3b55) has not changed since last install.
## Use `force = TRUE` to force installation
library("adegenet")
library('ggvenn')
## Loading required package: grid
library('tximport')
library('apeglm')
library('ashr')
library('pairwiseAdonis')
## Loading required package: cluster
library('EnhancedVolcano')
## Registered S3 methods overwritten by 'ggalt':
## method from
## grid.draw.absoluteGrob ggplot2
## grobHeight.absoluteGrob ggplot2
## grobWidth.absoluteGrob ggplot2
## grobX.absoluteGrob ggplot2
## grobY.absoluteGrob ggplot2
library('BiocManager')
source_url("https://raw.githubusercontent.com/obigriffith/biostar-tutorials/master/Heatmaps/heatmap.3.R")
## ℹ SHA-1 hash of file is "015fc0457e61e3e93a903e69a24d96d2dac7b9fb"
# Functions
trimFunction <- function(resLFC,p_adj_cut,lfc_cut) {
resOrdered<-resLFC[order(resLFC$padj),]
y<-na.omit(resOrdered)
resTrim<-y[y$padj < p_adj_cut,]
resTrim <- resTrim[ which( resTrim$log2FoldChange > lfc_cut | resTrim$log2FoldChange < -lfc_cut), ]
resTrim <- as.data.frame(resTrim)
resTrim$genes <- rownames(resTrim)
return(resTrim)
}
applyAnnot <- function(inputDf,inputDfAnnot) {
gene_and_annot_col <- character(0)
inputDf_annot_genes <- inputDfAnnot$genes
inputDf_annot_pfam <- inputDfAnnot$pfam_annotation
for (i in 1:length(inputDf_annot_genes)) {
gene_and_annot_col <- c(gene_and_annot_col,
paste(inputDf_annot_genes[i], " - ", inputDf_annot_pfam[i]))
}
inputDf_genes <- inputDf$genes
new_gene_annot_col <- character(0)
for (i in inputDf_genes) {
gene <- i
for (j in gene_and_annot_col) {
j_split <- strsplit(j, " ", fixed = TRUE)[[1]]
if (i %in% j_split[1]) {
gene <- j
}
}
new_gene_annot_col <- c(new_gene_annot_col, gene)
}
inputDf$genes <- new_gene_annot_col
return(inputDf)
}
heatmapFunction <- function(newColNames,commonGenes,commonGenesAll,vsd,nameCode) {
vsdCommonGm <- vsd[commonGenes$genes,]
vsdCommonGm@colData@rownames = newColNames
annot_genes = commonGenesAll$genes
genes = names(vsdCommonGm)
new_names = list()
for (i in annot_genes) {
gene = i
for (j in genes) {
if (i == j) {
gene = j
}
}
new_names <- c(new_names,gene)
}
names(vsdCommonGm) <- new_names
pheatmap(assay(vsdCommonGm),main=nameCode,scale="row", cluster_rows=T, show_rownames=T,
cluster_cols=FALSE,cellwidth = 20,fontsize_row = 4)
return(vsdCommonGm)
}
similarityFunction <- function(dataset1,dataset2) {
c = 0
for (i in rownames(dataset1)) {
if (i %in% rownames(dataset2)) {
c = c + 1
}
}
sim = (c / as.integer(length(rownames(dataset2)))) * 100
return(sim)
}
#### May 2018 ####
scriptPath<-dirname(rstudioapi::getSourceEditorContext()$path)
setwd(scriptPath)
A_pfam<-read.csv('Astroides_pfam.csv',header=T,sep=',')
samplesSP<-read.table('tximport_design_SP_may2018.txt',header=T)
samplesPV<-read.table('tximport_design_PV_may2018.txt',header=T)
samplesGM<-read.table('tximport_design_GM_may2018.txt',header=T)
dataPath<-'/Users/mmeynadier/Documents/PhD/species/Astroides/analysis/STARmapping/teixido/adult/may2018'
outputPath<-'/Users/mmeynadier/Documents/Astroides/comparative_transcriptomics_astroides/output/DESeq2/annotatedGenome/adult/trueTransplant/'
setwd(dataPath)
data<-list.files(pattern = "*ReadsPerGene.out.tab$", full.names = TRUE)
counts.files <- lapply(data, read.table, skip = 4)
raw_counts <- as.data.frame(sapply(counts.files, function(x) x[ , 2]))
data <- gsub( "Users/mmeynadier/Documents/PhD/species/Astroides/analysis/STARmapping/teixido/adult/may2018", "", data )
data <- gsub( "_ReadsPerGene.out.tab", "", data )
data <- gsub( "./", "", data )
colnames(raw_counts) <- data
row.names(raw_counts) <- counts.files[[1]]$V1
raw_counts_sp <- raw_counts[,grep("may2018_sp", colnames(raw_counts))]
raw_counts_pv <- raw_counts[,grep("may2018_pv", colnames(raw_counts))]
raw_counts_gm <- raw_counts[,grep("may2018_gm", colnames(raw_counts))]
samples_order_sp = c(5,6,7,8,9,10,11,12,1,2,3,4)
samples_order_pv = c(6,7,8,9,10,11,12,13,14,1,2,3,4,5)
samples_order_gm = c(1,2,3,4,5,6,7,8,14,15,16,17,18,9,10,11,12,13)
raw_counts_sp <- raw_counts_sp[,samples_order_sp]
raw_counts_pv <- raw_counts_pv[,samples_order_pv]
raw_counts_gm <- raw_counts_gm[,samples_order_gm]
#### SP ####
# DDS object
dds<-DESeqDataSetFromMatrix(countData = raw_counts_sp, colData = samplesSP,design = ~originSite_finalSite_experiment)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
# pre-filtering
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]
# Differential expression analysis
dds<-DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
cbind(resultsNames(dds))
## [,1]
## [1,] "Intercept"
## [2,] "originSite_finalSite_experiment_sp_sp_bck_vs_sp_gm_trt"
## [3,] "originSite_finalSite_experiment_sp_sp_tro_vs_sp_gm_trt"
sp_sp_bck_VS_sp_sp_tro_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","sp_sp_bck","sp_sp_tro"), lfcThreshold = 1,alpha = 0.05)
sp_sp_bck_VS_sp_sp_tro_lfc2_intra<-trimFunction(sp_sp_bck_VS_sp_sp_tro_lfc2_intra,0.05,1)
sp_sp_bck_VS_sp_gm_trt_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","sp_sp_bck","sp_gm_trt"), lfcThreshold = 1,alpha = 0.05)
sp_sp_bck_VS_sp_gm_trt_lfc2_intra<-trimFunction(sp_sp_bck_VS_sp_gm_trt_lfc2_intra,0.05,1)
rownames1_intra = row.names(sp_sp_bck_VS_sp_gm_trt_lfc2_intra)
rownames2_intra = row.names(sp_sp_bck_VS_sp_sp_tro_lfc2_intra)
rownames_intra <- union(rownames1_intra,rownames2_intra)
rownames_intra <- data.frame(genes=rownames_intra)
rownames_annot_intra = merge(rownames_intra,A_pfam,by="genes")
rownames_annot_all_intra <- applyAnnot(rownames_intra,rownames_annot_intra)
vsd = vst(dds,blind=T)
newColNames <- samplesSP$originSite_finalSite_experiment
vsd_common_gm_lfc2_intra_may2018_SP = heatmapFunction(newColNames,rownames_intra,rownames_annot_all_intra,vsd,"May2018 SP")

pcaData = plotPCA(vsd, intgroup="originSite_finalSite_experiment",returnData=TRUE,ntop=200)
percentVar = round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, colour = originSite_finalSite_experiment)) +
geom_point(size = 5) + theme_bw() +
scale_color_manual(values = c("#ff4040","#8B0000","#00008B","#6495ED")) +
geom_point() +
ggtitle("Principal Component Analysis of adult corals", subtitle = "may2018 dataset - SP") +
theme(text = element_text(size=14),legend.text = element_text(size=12), legend.position = 'bottom') +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance"))

count_tab_assay <- assay(vsd)
dist_tab_assay <- dist(t(count_tab_assay),method="euclidian")
ad<-adonis2(data=samplesSP,dist_tab_assay ~ originSite_finalSite_experiment, method="euclidian")
ad
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_tab_assay ~ originSite_finalSite_experiment, data = samplesSP, method = "euclidian")
## Df SumOfSqs R2 F Pr(>F)
## originSite_finalSite_experiment 2 20503 0.23019 1.3456 0.029 *
## Residual 9 68566 0.76981
## Total 11 89069 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesSP$originSite_finalSite_experiment)
## Set of permutations < 'minperm'. Generating entire set.
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
## 1 sp_sp_bck vs sp_sp_tro 1 8135.988 1.007740 0.1438039 0.401 1.000
## 2 sp_sp_bck vs sp_gm_trt 1 9009.643 1.377171 0.2159532 0.031 0.093
## 3 sp_sp_tro vs sp_gm_trt 1 13097.947 1.637823 0.1896107 0.027 0.081
mod <- betadisper(dist_tab_assay,samplesSP$originSite_finalSite_experiment)
mod.HSD <- TukeyHSD(mod)
mod.HSD
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = distances ~ group, data = df)
##
## $group
## diff lwr upr p adj
## sp_sp_bck-sp_gm_trt -6.011822 -35.780104 23.75646 0.8421258
## sp_sp_tro-sp_gm_trt 12.530639 -13.615143 38.67642 0.4108587
## sp_sp_tro-sp_sp_bck 18.542461 -9.921461 47.00638 0.2179367
#### PV ####
# DDS object
dds<-DESeqDataSetFromMatrix(countData = raw_counts_pv, colData = samplesPV,design = ~originSite_finalSite_experiment)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
# pre-filtering
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]
# Differential expression analysis
dds<-DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
cbind(resultsNames(dds))
## [,1]
## [1,] "Intercept"
## [2,] "originSite_finalSite_experiment_pv_pv_bck_vs_pv_gm_trt"
## [3,] "originSite_finalSite_experiment_pv_pv_tro_vs_pv_gm_trt"
pv_pv_bck_VS_pv_pv_tro_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","pv_pv_bck","pv_pv_tro"), lfcThreshold = 1,alpha = 0.05)
pv_pv_bck_VS_pv_pv_tro_lfc2_intra<-trimFunction(pv_pv_bck_VS_pv_pv_tro_lfc2_intra,0.05,1)
pv_pv_bck_VS_pv_gm_trt_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","pv_pv_bck","pv_gm_trt"), lfcThreshold = 1,alpha = 0.05)
pv_pv_bck_VS_pv_gm_trt_lfc2_intra<-trimFunction(pv_pv_bck_VS_pv_gm_trt_lfc2_intra,0.05,1)
rownames1_intra = row.names(pv_pv_bck_VS_pv_gm_trt_lfc2_intra)
rownames2_intra = row.names(pv_pv_bck_VS_pv_pv_tro_lfc2_intra)
rownames_intra <- union(rownames1_intra,rownames2_intra)
rownames_intra <- data.frame(genes=rownames_intra)
rownames_annot_intra = merge(rownames_intra,A_pfam,by="genes")
rownames_annot_all_intra <- applyAnnot(rownames_intra,rownames_annot_intra)
vsd = vst(dds,blind=T)
newColNames <- samplesPV$originSite_finalSite_experiment
vsd_common_gm_lfc2_intra_may2018_PV = heatmapFunction(newColNames,rownames_intra,rownames_annot_all_intra,vsd,"May2018 PV")

pcaData = plotPCA(vsd, intgroup="originSite_finalSite_experiment",returnData=TRUE,ntop=200)
percentVar = round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, colour = originSite_finalSite_experiment)) +
geom_point(size = 5) + theme_bw() +
scale_color_manual(values = c("#ff4040","#8B0000","#00008B","#6495ED")) +
geom_point() +
ggtitle("Principal Component Analysis of adult corals", subtitle = "may2018 dataset - PV") +
theme(text = element_text(size=14),legend.text = element_text(size=12), legend.position = 'bottom') +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance"))

count_tab_assay <- assay(vsd)
dist_tab_assay <- dist(t(count_tab_assay),method="euclidian")
ad<-adonis2(data=samplesPV,dist_tab_assay ~ originSite_finalSite_experiment, method="euclidian")
ad
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_tab_assay ~ originSite_finalSite_experiment, data = samplesPV, method = "euclidian")
## Df SumOfSqs R2 F Pr(>F)
## originSite_finalSite_experiment 2 14757 0.18806 1.2739 0.051 .
## Residual 11 63713 0.81194
## Total 13 78471 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesPV$originSite_finalSite_experiment)
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted
## 1 pv_pv_bck vs pv_pv_tro 1 5365.800 0.9335229 0.1176681 0.523 1.000
## 2 pv_pv_bck vs pv_gm_trt 1 7077.013 1.2708689 0.1747891 0.119 0.357
## 3 pv_pv_tro vs pv_gm_trt 1 9245.056 1.5471536 0.1466892 0.016 0.048
## sig
## 1
## 2
## 3 .
mod <- betadisper(dist_tab_assay,samplesPV$originSite_finalSite_experiment)
mod.HSD <- TukeyHSD(mod)
mod.HSD
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = distances ~ group, data = df)
##
## $group
## diff lwr upr p adj
## pv_pv_bck-pv_gm_trt -10.912440 -23.6635945 1.838714 0.0960273
## pv_pv_tro-pv_gm_trt 2.342161 -8.2305379 12.914859 0.8238678
## pv_pv_tro-pv_pv_bck 13.254601 0.9083489 25.600853 0.0356006
#### GM ####
# DDS object
dds<-DESeqDataSetFromMatrix(countData = raw_counts_gm, colData = samplesGM,design = ~originSite_finalSite_experiment)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
# pre-filtering
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]
# Differential expression analysis
dds<-DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
cbind(resultsNames(dds))
## [,1]
## [1,] "Intercept"
## [2,] "originSite_finalSite_experiment_gm_gm_tro_vs_gm_gm_bck"
## [3,] "originSite_finalSite_experiment_gm_pv_trt_vs_gm_gm_bck"
## [4,] "originSite_finalSite_experiment_gm_sp_trt_vs_gm_gm_bck"
gm_gm_bck_VS_gm_gm_tro_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","gm_gm_bck","gm_gm_tro"), lfcThreshold = 1,alpha = 0.05)
gm_gm_bck_VS_gm_gm_tro_lfc2_intra<-trimFunction(gm_gm_bck_VS_gm_gm_tro_lfc2_intra,0.05,1)
gm_gm_bck_VS_gm_sp_trt_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","gm_gm_bck","gm_sp_trt"), lfcThreshold = 1,alpha = 0.05)
gm_gm_bck_VS_gm_sp_trt_lfc2_intra<-trimFunction(gm_gm_bck_VS_gm_sp_trt_lfc2_intra,0.05,1)
gm_gm_bck_VS_gm_pv_trt_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","gm_gm_bck","gm_pv_trt"), lfcThreshold = 1,alpha = 0.05)
gm_gm_bck_VS_gm_pv_trt_lfc2_intra<-trimFunction(gm_gm_bck_VS_gm_pv_trt_lfc2_intra,0.05,1)
rownames1_intra = row.names(gm_gm_bck_VS_gm_gm_tro_lfc2_intra)
rownames2_intra = row.names(gm_gm_bck_VS_gm_sp_trt_lfc2_intra)
rownames3_intra = row.names(gm_gm_bck_VS_gm_pv_trt_lfc2_intra)
rownames_intra <- union(rownames1_intra,rownames2_intra)
rownames_intra <- union(rownames_intra,rownames3_intra)
rownames_intra <- data.frame(genes=rownames_intra)
rownames_annot_intra = merge(rownames_intra,A_pfam,by="genes")
rownames_annot_all_intra <- applyAnnot(rownames_intra,rownames_annot_intra)
vsd = vst(dds,blind=T)
newColNames <- samplesGM$originSite_finalSite_experiment
vsd_common_gm_lfc2_intra_may2018_GM = heatmapFunction(newColNames,rownames_intra,rownames_annot_all_intra,vsd,"May2018 GM")

pcaData = plotPCA(vsd, intgroup="originSite_finalSite_experiment",returnData=TRUE,ntop=200)
percentVar = round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, colour = originSite_finalSite_experiment)) +
geom_point(size = 5) + theme_bw() +
scale_color_manual(values = c("#ff4040","#8B0000","#00008B","#6495ED")) +
geom_point() +
ggtitle("Principal Component Analysis of adult corals", subtitle = "may2018 dataset - GM") +
theme(text = element_text(size=14),legend.text = element_text(size=12), legend.position = 'bottom') +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance"))

# Inference statistics
count_tab_assay <- assay(vsd)
dist_tab_assay <- dist(t(count_tab_assay),method="euclidian")
ad<-adonis2(data=samplesGM,dist_tab_assay ~ originSite_finalSite_experiment, method="euclidian")
ad
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_tab_assay ~ originSite_finalSite_experiment, data = samplesGM, method = "euclidian")
## Df SumOfSqs R2 F Pr(>F)
## originSite_finalSite_experiment 3 23277 0.26562 1.6879 0.001 ***
## Residual 14 64358 0.73438
## Total 17 87635 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesGM$originSite_finalSite_experiment)
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
## 1 gm_gm_bck vs gm_gm_tro 1 4188.580 1.026904 0.1461389 0.403 1.000
## 2 gm_gm_bck vs gm_sp_trt 1 6205.750 1.513697 0.2014583 0.018 0.108
## 3 gm_gm_bck vs gm_pv_trt 1 9145.311 1.886567 0.2392127 0.031 0.186
## 4 gm_gm_tro vs gm_sp_trt 1 8509.060 1.929915 0.1943536 0.009 0.054
## 5 gm_gm_tro vs gm_pv_trt 1 11036.627 2.220680 0.2172732 0.009 0.054
## 6 gm_sp_trt vs gm_pv_trt 1 6722.093 1.348302 0.1442296 0.104 0.624
mod <- betadisper(dist_tab_assay,samplesGM$originSite_finalSite_experiment)
mod.HSD <- TukeyHSD(mod)
mod.HSD
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = distances ~ group, data = df)
##
## $group
## diff lwr upr p adj
## gm_gm_tro-gm_gm_bck 11.1964219 0.6179835 21.774860 0.0365427
## gm_pv_trt-gm_gm_bck 18.6076921 8.0292537 29.186131 0.0008054
## gm_sp_trt-gm_gm_bck 11.5266964 0.9482580 22.105135 0.0308531
## gm_pv_trt-gm_gm_tro 7.4112702 -1.7499262 16.572467 0.1332516
## gm_sp_trt-gm_gm_tro 0.3302745 -8.8309218 9.491471 0.9995672
## gm_sp_trt-gm_pv_trt -7.0809957 -16.2421921 2.080201 0.1585641
#### Sept2018 ####
scriptPath<-dirname(rstudioapi::getSourceEditorContext()$path)
setwd(scriptPath)
A_pfam<-read.csv('Astroides_pfam.csv',header=T,sep=',')
samplesSP<-read.table('tximport_design_SP_sept2018.txt',header=T)
samplesPV<-read.table('tximport_design_PV_sept2018.txt',header=T)
samplesGM<-read.table('tximport_design_GM_sept2018.txt',header=T)
dataPath<-'/Users/mmeynadier/Documents/PhD/species/Astroides/analysis/STARmapping/teixido/adult/sept2018'
outputPath<-'/Users/mmeynadier/Documents/Astroides/comparative_transcriptomics_astroides/output/DESeq2/annotatedGenome/adult/trueTransplant/'
setwd(dataPath)
data<-list.files(pattern = "*ReadsPerGene.out.tab$", full.names = TRUE)
counts.files <- lapply(data, read.table, skip = 4)
raw_counts <- as.data.frame(sapply(counts.files, function(x) x[ , 2]))
data <- gsub( "Users/mmeynadier/Documents/PhD/species/Astroides/analysis/STARmapping/teixido/adult/sept2018", "", data )
data <- gsub( "_ReadsPerGene.out.tab", "", data )
data <- gsub( "./", "", data )
colnames(raw_counts) <- data
row.names(raw_counts) <- counts.files[[1]]$V1
raw_counts_sp <- raw_counts[,grep("sept2018_sp", colnames(raw_counts))]
raw_counts_pv <- raw_counts[,grep("sept2018_pv", colnames(raw_counts))]
raw_counts_gm <- raw_counts[,grep("sept2018_gm", colnames(raw_counts))]
samples_order_sp = c(8,9,10,11,12,13,14,15,16,1,2,3,4,5,6,7)
samples_order_pv = c(7,8,9,10,11,12,13,14,15,1,2,3,4,5,6)
samples_order_gm = c(1,2,3,4,5,6,7,8,9,10,18,19,20,21,22,23,24,11,12,13,14,15,16,17)
raw_counts_sp <- raw_counts_sp[,samples_order_sp]
raw_counts_pv <- raw_counts_pv[,samples_order_pv]
raw_counts_gm <- raw_counts_gm[,samples_order_gm]
#### SP ####
# DDS object
dds<-DESeqDataSetFromMatrix(countData = raw_counts_sp, colData = samplesSP,design = ~originSite_finalSite_experiment)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
# pre-filtering
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]
# Differential expression analysis
dds<-DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 153 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
cbind(resultsNames(dds))
## [,1]
## [1,] "Intercept"
## [2,] "originSite_finalSite_experiment_sp_sp_bck_vs_sp_gm_gas"
## [3,] "originSite_finalSite_experiment_sp_sp_gas_vs_sp_gm_gas"
sp_sp_bck_VS_sp_sp_gas_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","sp_sp_bck","sp_sp_gas"), lfcThreshold = 1,alpha = 0.05)
sp_sp_bck_VS_sp_sp_gas_lfc2_intra<-trimFunction(sp_sp_bck_VS_sp_sp_gas_lfc2_intra,0.05,1)
sp_sp_bck_VS_sp_gm_gas_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","sp_sp_bck","sp_gm_gas"), lfcThreshold = 1,alpha = 0.05)
sp_sp_bck_VS_sp_gm_gas_lfc2_intra<-trimFunction(sp_sp_bck_VS_sp_gm_gas_lfc2_intra,0.05,1)
rownames1_intra = row.names(sp_sp_bck_VS_sp_gm_gas_lfc2_intra)
rownames2_intra = row.names(sp_sp_bck_VS_sp_sp_gas_lfc2_intra)
rownames_intra <- union(rownames1_intra,rownames2_intra)
rownames_intra <- data.frame(genes=rownames_intra)
rownames_annot_intra = merge(rownames_intra,A_pfam,by="genes")
rownames_annot_all_intra <- applyAnnot(rownames_intra,rownames_annot_intra)
vsd = vst(dds,blind=T)
newColNames <- samplesSP$originSite_finalSite_experiment
vsd_common_gm_lfc2_intra_sept2018_SP = heatmapFunction(newColNames,rownames_intra,rownames_annot_all_intra,vsd,"Sept2018 SP")

pcaData = plotPCA(vsd, intgroup="originSite_finalSite_experiment",returnData=TRUE,ntop=200)
percentVar = round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, colour = originSite_finalSite_experiment)) +
geom_point(size = 5) + theme_bw() +
scale_color_manual(values = c("#ff4040","#8B0000","#00008B","#6495ED")) +
geom_point() +
ggtitle("Principal Component Analysis of adult corals", subtitle = "sept2018 dataset - SP") +
theme(text = element_text(size=14),legend.text = element_text(size=12), legend.position = 'bottom') +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance"))

# Inference statistics
count_tab_assay <- assay(vsd)
dist_tab_assay <- dist(t(count_tab_assay),method="euclidian")
ad<-adonis2(data=samplesSP,dist_tab_assay ~ originSite_finalSite_experiment, method="euclidian")
ad
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_tab_assay ~ originSite_finalSite_experiment, data = samplesSP, method = "euclidian")
## Df SumOfSqs R2 F Pr(>F)
## originSite_finalSite_experiment 2 25006 0.22786 1.9181 0.004 **
## Residual 13 84738 0.77214
## Total 15 109743 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesSP$originSite_finalSite_experiment)
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted
## 1 sp_sp_bck vs sp_sp_gas 1 13544.645 2.196092 0.23880712 0.022 0.066
## 2 sp_sp_bck vs sp_gm_gas 1 17771.253 2.746860 0.25559651 0.004 0.012
## 3 sp_sp_gas vs sp_gm_gas 1 7729.014 1.140513 0.09394273 0.207 0.621
## sig
## 1
## 2 .
## 3
mod <- betadisper(dist_tab_assay,samplesSP$originSite_finalSite_experiment)
mod.HSD <- TukeyHSD(mod)
mod.HSD
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = distances ~ group, data = df)
##
## $group
## diff lwr upr p adj
## sp_sp_bck-sp_gm_gas -18.09482 -34.91844 -1.271211 0.0347559
## sp_sp_gas-sp_gm_gas -2.42658 -15.99021 11.137050 0.8853141
## sp_sp_gas-sp_sp_bck 15.66824 -1.57083 32.907318 0.0767997
#### PV ####
# DDS object
dds<-DESeqDataSetFromMatrix(countData = raw_counts_pv, colData = samplesPV,design = ~originSite_finalSite_experiment)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
# pre-filtering
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]
# Differential expression analysis
dds<-DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
cbind(resultsNames(dds))
## [,1]
## [1,] "Intercept"
## [2,] "originSite_finalSite_experiment_pv_pv_bck_vs_pv_gm_gas"
## [3,] "originSite_finalSite_experiment_pv_pv_gas_vs_pv_gm_gas"
pv_pv_bck_VS_pv_pv_gas_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","pv_pv_bck","pv_pv_gas"), lfcThreshold = 1,alpha = 0.05)
pv_pv_bck_VS_pv_pv_gas_lfc2_intra<-trimFunction(pv_pv_bck_VS_pv_pv_gas_lfc2_intra,0.05,1)
pv_pv_bck_VS_pv_gm_gas_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","pv_pv_bck","pv_gm_gas"), lfcThreshold = 1,alpha = 0.05)
pv_pv_bck_VS_pv_gm_gas_lfc2_intra<-trimFunction(pv_pv_bck_VS_pv_gm_gas_lfc2_intra,0.05,1)
rownames1_intra = row.names(pv_pv_bck_VS_pv_gm_gas_lfc2_intra)
rownames2_intra = row.names(pv_pv_bck_VS_pv_pv_gas_lfc2_intra)
rownames_intra <- union(rownames1_intra,rownames2_intra)
rownames_intra <- data.frame(genes=rownames_intra)
rownames_annot_intra = merge(rownames_intra,A_pfam,by="genes")
rownames_annot_all_intra <- applyAnnot(rownames_intra,rownames_annot_intra)
vsd = vst(dds,blind=T)
newColNames <- samplesPV$originSite_finalSite_experiment
vsd_common_gm_lfc2_intra_sept2018_PV = heatmapFunction(newColNames,rownames_intra,rownames_annot_all_intra,vsd,"Sept2018 PV")

pcaData = plotPCA(vsd, intgroup="originSite_finalSite_experiment",returnData=TRUE,ntop=200)
percentVar = round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, colour = originSite_finalSite_experiment)) +
geom_point(size = 5) + theme_bw() +
scale_color_manual(values = c("#ff4040","#8B0000","#00008B","#6495ED")) +
geom_point() +
ggtitle("Principal Component Analysis of adult corals", subtitle = "sept2018 dataset - PV") +
theme(text = element_text(size=14),legend.text = element_text(size=12), legend.position = 'bottom') +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance"))

# Inference statistics
count_tab_assay <- assay(vsd)
dist_tab_assay <- dist(t(count_tab_assay),method="euclidian")
ad<-adonis2(data=samplesPV,dist_tab_assay ~ originSite_finalSite_experiment, method="euclidian")
ad
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_tab_assay ~ originSite_finalSite_experiment, data = samplesPV, method = "euclidian")
## Df SumOfSqs R2 F Pr(>F)
## originSite_finalSite_experiment 2 14237 0.17925 1.3104 0.018 *
## Residual 12 65187 0.82075
## Total 14 79424 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesPV$originSite_finalSite_experiment)
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted sig
## 1 pv_pv_bck vs pv_pv_gas 1 8070.869 1.416911 0.1683410 0.058 0.174
## 2 pv_pv_bck vs pv_gm_gas 1 6709.340 1.340236 0.1606953 0.029 0.087
## 3 pv_pv_gas vs pv_gm_gas 1 6711.089 1.210123 0.1079491 0.066 0.198
mod <- betadisper(dist_tab_assay,samplesPV$originSite_finalSite_experiment)
mod.HSD <- TukeyHSD(mod)
mod.HSD
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = distances ~ group, data = df)
##
## $group
## diff lwr upr p adj
## pv_pv_bck-pv_gm_gas -8.325279 -21.7022907 5.051732 0.2596499
## pv_pv_gas-pv_gm_gas 5.765340 -5.1569438 16.687625 0.3676325
## pv_pv_gas-pv_pv_bck 14.090620 0.7136081 27.467631 0.0388841
#### GM ####
# DDS object
dds<-DESeqDataSetFromMatrix(countData = raw_counts_gm, colData = samplesGM,design = ~originSite_finalSite_experiment)
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
# pre-filtering
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]
# Differential expression analysis
dds<-DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 169 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
cbind(resultsNames(dds))
## [,1]
## [1,] "Intercept"
## [2,] "originSite_finalSite_experiment_gm_gm_gas_vs_gm_gm_bck"
## [3,] "originSite_finalSite_experiment_gm_pv_gas_vs_gm_gm_bck"
## [4,] "originSite_finalSite_experiment_gm_sp_gas_vs_gm_gm_bck"
gm_gm_bck_VS_gm_gm_gas_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","gm_gm_bck","gm_gm_gas"), lfcThreshold = 1,alpha = 0.05)
gm_gm_bck_VS_gm_gm_gas_lfc2_intra<-trimFunction(gm_gm_bck_VS_gm_gm_gas_lfc2_intra,0.05,1)
gm_gm_bck_VS_gm_sp_gas_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","gm_gm_bck","gm_sp_gas"), lfcThreshold = 1,alpha = 0.05)
gm_gm_bck_VS_gm_sp_gas_lfc2_intra<-trimFunction(gm_gm_bck_VS_gm_sp_gas_lfc2_intra,0.05,1)
gm_gm_bck_VS_gm_pv_gas_lfc2_intra<-results(dds, contrast=c("originSite_finalSite_experiment","gm_gm_bck","gm_pv_gas"), lfcThreshold = 1,alpha = 0.05)
gm_gm_bck_VS_gm_pv_gas_lfc2_intra<-trimFunction(gm_gm_bck_VS_gm_pv_gas_lfc2_intra,0.05,1)
rownames1_intra = row.names(gm_gm_bck_VS_gm_gm_gas_lfc2_intra)
rownames2_intra = row.names(gm_gm_bck_VS_gm_sp_gas_lfc2_intra)
rownames3_intra = row.names(gm_gm_bck_VS_gm_pv_gas_lfc2_intra)
rownames_intra <- union(rownames1_intra,rownames2_intra)
rownames_intra <- union(rownames_intra,rownames3_intra)
rownames_intra <- data.frame(genes=rownames_intra)
rownames_annot_intra = merge(rownames_intra,A_pfam,by="genes")
rownames_annot_all_intra <- applyAnnot(rownames_intra,rownames_annot_intra)
vsd = vst(dds,blind=T)
newColNames <- samplesGM$originSite_finalSite_experiment
vsd_common_gm_lfc2_intra_sept2018_GM = heatmapFunction(newColNames,rownames_intra,rownames_annot_all_intra,vsd,"Sept2018 GM")

pcaData = plotPCA(vsd, intgroup="originSite_finalSite_experiment",returnData=TRUE,ntop=200)
percentVar = round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, colour = originSite_finalSite_experiment)) +
geom_point(size = 5) + theme_bw() +
scale_color_manual(values = c("#ff4040","#8B0000","#00008B","#6495ED")) +
geom_point() +
ggtitle("Principal Component Analysis of adult corals", subtitle = "sept2018 dataset - GM") +
theme(text = element_text(size=14),legend.text = element_text(size=12), legend.position = 'bottom') +
xlab(paste0("PC1: ",percentVar[1],"% variance")) +
ylab(paste0("PC2: ",percentVar[2],"% variance"))

# Inference statistics
count_tab_assay <- assay(vsd)
dist_tab_assay <- dist(t(count_tab_assay),method="euclidian")
ad<-adonis2(data=samplesGM,dist_tab_assay ~ originSite_finalSite_experiment, method="euclidian")
ad
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = dist_tab_assay ~ originSite_finalSite_experiment, data = samplesGM, method = "euclidian")
## Df SumOfSqs R2 F Pr(>F)
## originSite_finalSite_experiment 3 17281 0.18819 1.5454 0.001 ***
## Residual 20 74548 0.81181
## Total 23 91829 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pair.mod<-pairwise.adonis(dist_tab_assay,factors=samplesGM$originSite_finalSite_experiment)
pair.mod
## pairs Df SumsOfSqs F.Model R2 p.value p.adjusted
## 1 gm_gm_bck vs gm_gm_gas 1 5859.268 1.381915 0.14729558 0.054 0.324
## 2 gm_gm_bck vs gm_sp_gas 1 7460.477 1.837561 0.18679026 0.010 0.060
## 3 gm_gm_bck vs gm_pv_gas 1 8543.803 2.027042 0.20215756 0.009 0.054
## 4 gm_gm_gas vs gm_sp_gas 1 4617.489 1.357118 0.10160263 0.023 0.138
## 5 gm_gm_gas vs gm_pv_gas 1 5161.401 1.472287 0.10928266 0.016 0.096
## 6 gm_sp_gas vs gm_pv_gas 1 4229.057 1.249087 0.09427719 0.061 0.366
## sig
## 1
## 2
## 3
## 4
## 5
## 6
mod <- betadisper(dist_tab_assay,samplesGM$originSite_finalSite_experiment)
mod.HSD <- TukeyHSD(mod)
mod.HSD
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = distances ~ group, data = df)
##
## $group
## diff lwr upr p adj
## gm_gm_gas-gm_gm_bck -8.5715381 -27.21395 10.070873 0.5813917
## gm_pv_gas-gm_gm_bck -9.0095738 -27.65198 9.632837 0.5418815
## gm_sp_gas-gm_gm_bck -10.4828320 -29.12524 8.159579 0.4150743
## gm_pv_gas-gm_gm_gas -0.4380357 -14.87838 14.002313 0.9997728
## gm_sp_gas-gm_gm_gas -1.9112939 -16.35164 12.529055 0.9821419
## gm_sp_gas-gm_pv_gas -1.4732582 -15.91361 12.967091 0.9916254